// -*- mode: Rust; -*-

use super::generate_class;
use super::lexer;
use super::lexer::{Token, LexicalError};
use regex_syntax::hir::{self, Hir};

// Pass in the original, untokenized input to facilitate error
// recovery.
grammar<'input>(input: &'input str);

// This is a straightforward translation of the regular expression
// grammar from section 8 of RFC 4880.
//
// https://www.rfc-editor.org/rfc/rfc9580.html#section-8
pub(crate) Regex : Hir = {
    <l:LBranch> <r:RBranch*> => {
        let mut r = r;
        r.insert(0, l);

        // If any of the branches are empty, then that branch matches
        // everything, and we can just short circuit the whole
        // alternation.
        //
        // This is actually required for version 1.3.7 of the regex
        // crate, which is the version that is in Debian Bullseye.
        // See issue #694 for details.
        if r.iter().any(|b| *b.kind() == hir::HirKind::Empty) {
            hir::Hir::empty()
        } else {
            Hir::alternation(r)
        }
    },
}

LBranch : Hir = {
    Branch,
}

RBranch : Hir = {
    PIPE <Branch>,
}

Branch : Hir = {
    => {
        hir::Hir::empty()
    },
    <p:Piece+> => {
        if p.iter().all(|p| *p.kind() == hir::HirKind::Empty) {
            // All pieces are empty.  Just return empty.
            hir::Hir::empty()
        } else {
            hir::Hir::concat(p)
        }
    },
}

Piece : Hir = {
    <a:Atom> => a,
    <a:Atom> STAR => {
        if *a.kind() == hir::HirKind::Empty {
            // Piece is empty.  This is equivalent to empty so just
            // return it.
            a
        } else {
            hir::Hir::repetition(hir::Repetition {
                min: 0,
                max: None,
                greedy: true,
                sub: Box::new(a)
            })
        }
    },
    <a:Atom> PLUS => {
        if *a.kind() == hir::HirKind::Empty {
            // Piece is empty.  This is equivalent to empty so just
            // return it.
            a
        } else {
            hir::Hir::repetition(hir::Repetition {
                min: 1,
                max: None,
                greedy: true,
                sub: Box::new(a)
            })
        }
    },
    <a:Atom> QUESTION => {
        if *a.kind() == hir::HirKind::Empty {
            // Piece is empty.  This is equivalent to empty so just
            // return it.
            a
        } else {
            hir::Hir::repetition(hir::Repetition {
                min: 0,
                max: Some(1),
                greedy: true,
                sub: Box::new(a)
            })
        }
    },
}

Atom : Hir = {
    LPAREN <r:Regex> RPAREN => {
        r
    },

    Range,

    DOT => {
        hir::Hir::dot(hir::Dot::AnyChar)
    },
    CARET => {
        hir::Hir::look(hir::Look::Start)
    },
    DOLLAR => {
        hir::Hir::look(hir::Look::End)
    },

    BACKSLASH <t:AnyChar> => {
        // "A buffer of length four is large enough to encode any
        // char."
        //
        // https://doc.rust-lang.org/std/primitive.char.html#method.encode_utf8
        let mut buffer = [0; 4];
        // Convert the Unicode character t to a string.
        let s = t.to_char().encode_utf8(&mut buffer);
        hir::Hir::literal(s.as_bytes())
    },

    DASH => {
        hir::Hir::literal("-".as_bytes())
    },

    <t:OTHER> => {
        // "A buffer of length four is large enough to encode any
        // char."
        //
        // https://doc.rust-lang.org/std/primitive.char.html#method.encode_utf8
        let mut buffer = [0; 4];
        let s = t.to_char().encode_utf8(&mut buffer);
        hir::Hir::literal(s.as_bytes())
    },

}

Range : Hir = {
    LBRACKET <c:CARET?> <class1:RBRACKET> <class2:NotRBracket*> RBRACKET => {
        generate_class(c.is_some(),
                       std::iter::once(class1.to_char())
                       .chain(class2.into_iter().map(|t| t.to_char())))
    },
    LBRACKET CARET <class:NotRBracket+> RBRACKET => {
        generate_class(true,
                       class.into_iter().map(|t| t.to_char()))
    },
    LBRACKET <class1:NotCaretNotRBracket> <class2:NotRBracket*> RBRACKET => {
        generate_class(false,
                       std::iter::once(class1.to_char())
                       .chain(class2.into_iter().map(|t| t.to_char())))
    },
}

NotRBracket : Token = {
    PIPE => Token::OTHER('|'),

    STAR => Token::OTHER('*'),
    PLUS => Token::OTHER('+'),
    QUESTION => Token::OTHER('?'),

    LPAREN => Token::OTHER('('),
    RPAREN => Token::OTHER(')'),

    DOT => Token::OTHER('.'),
    CARET => Token::OTHER('^'),
    DOLLAR => Token::OTHER('$'),
    BACKSLASH => Token::OTHER('\\'),

    LBRACKET => Token::OTHER('['),
    // RBRACKET => Token::OTHER(']'),
    DASH => Token::OTHER('-'),

    OTHER,
}

NotCaretNotRBracket : Token = {
    PIPE => Token::OTHER('|'),

    STAR => Token::OTHER('*'),
    PLUS => Token::OTHER('+'),
    QUESTION => Token::OTHER('?'),

    LPAREN => Token::OTHER('('),
    RPAREN => Token::OTHER(')'),

    DOT => Token::OTHER('.'),
    // CARET => Token::OTHER('^'),
    DOLLAR => Token::OTHER('$'),
    BACKSLASH => Token::OTHER('\\'),

    LBRACKET => Token::OTHER('['),
    // RBRACKET => Token::OTHER(']'),
    DASH => Token::OTHER('-'),

    OTHER,
}

AnyChar : Token = {
    PIPE => Token::OTHER('|'),

    STAR => Token::OTHER('*'),
    PLUS => Token::OTHER('+'),
    QUESTION => Token::OTHER('?'),

    LPAREN => Token::OTHER('('),
    RPAREN => Token::OTHER(')'),

    DOT => Token::OTHER('.'),
    CARET => Token::OTHER('^'),
    DOLLAR => Token::OTHER('$'),
    BACKSLASH => Token::OTHER('\\'),

    LBRACKET => Token::OTHER('['),
    RBRACKET => Token::OTHER(']'),
    DASH => Token::OTHER('-'),

    OTHER,
}

extern {
    type Location = usize;
    type Error = LexicalError;

    enum lexer::Token {
        PIPE => lexer::Token::PIPE,

        STAR => lexer::Token::STAR,
        PLUS => lexer::Token::PLUS,
        QUESTION => lexer::Token::QUESTION,

        LPAREN => lexer::Token::LPAREN,
        RPAREN => lexer::Token::RPAREN,

        DOT => lexer::Token::DOT,
        CARET => lexer::Token::CARET,
        DOLLAR => lexer::Token::DOLLAR,
        BACKSLASH => lexer::Token::BACKSLASH,

        LBRACKET => lexer::Token::LBRACKET,
        RBRACKET => lexer::Token::RBRACKET,
        DASH => lexer::Token::DASH,

        OTHER => lexer::Token::OTHER(_),
    }
}
